| Sample Name | Time point | Description | Replicates |
|---|---|---|---|
| time_0 | time_0 days post amputation | tissue close to wounding site on the fin (right after amputation) | 3 |
| time_1 | time_1 hours post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_2 | time_2 hours post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_3 | time_3 hours post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_4 | time_4 days post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_5 | time_5 days post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_6 | time_6 days post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_7 | time_7 days post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_8 | time_8 days post amputation | tissue, posterior, to wounding site on the fin | 3 |
| time_9 | time_9 days post amputation | tissue close to wounding site on the fin | 3 |
| Posterior | N/A | tissue at most posterior point of the fin | 3 |
| Amputated | N/A | Amputated fin tissue (right after amputation) | 3 |
# Read in count tables
counts.1249 <- readRDS(here("data","counts","MOLNG_1249_counts.RDS"))
counts.1332 <- readRDS(here("data","counts","MOLNG_1332_counts.RDS"))
# combine counts tables and reorder
counts.combined <- cbind(counts.1249,counts.1332)
counts.combined <- counts.combined[,c(22:24,1:3,43:45,16:18,40:42,4:15,19:21,37:39,25:36)]
colnames(counts.combined) <- c(paste("Amputated_R",1:3,sep=''),paste("time_0_R",1:3,sep=''),paste("time_1_R",1:3,sep=''),
paste("time_2_R",1:3,sep=''),paste("time_3_R",1:3,sep=''),paste("time_4_R",1:3,sep=''),
paste("time_5_R",1:3,sep=''),paste("time_6_R",1:3,sep=''),paste("time_7_R",1:3,sep='')
,paste("time_8_R",1:3,sep=''),paste("time_9_R",1:3,sep=''),colnames(counts.combined)[34:45])
# Get group info/samples names for targets
s.names <- colnames(counts.combined)
targets <- data.frame(s.names,c(rep("Amputated",3),rep("time_0",3),rep("time_1",3),rep("time_2",3),
rep("time_3",3),rep("time_4",3),rep("time_5",3),
rep("time_6",3),rep("time_7",3),rep("time_8",3),rep("time_9",3),
rep("Forebrain",3),rep("Hindbrain",3),
rep("Midbrain",3),rep("Posterior",3)))
colnames(targets) <- c("sample.name","group")
# functions
minpositive <- function(x)min(x[x > 0])
maxnegative <- function(x)max(x[x < 0])# select samples needed for this comparison (all time points)
counts.0 <- counts.combined[,c(4:6,1:3,7:33)]
targets.0 <- targets[c(4:6,1:3,7:33),]
# make the data frame to edgeR formate DGEList
z <- DGEList(counts=counts.0, group=targets.0$group)
# filtering
keep <- rowSums(cpm(z)>3) >= 3
table(keep)## keep
## FALSE TRUE
## 9339 15809
z <- z[keep, , keep.lib.sizes=FALSE]
#normalizes for RNA composition by finding a set of scaling
#factors for the library sizes that minimize the log-fold changes between the samples for most genes.
# TMM normalization
z <- calcNormFactors(z)
### Estimating Dispersions
#see page 18 of user guide for explanation of qCML
#estimate the qCML common dispersion
z <- estimateCommonDisp(z)
#qCML tagwise dispersions
z <- estimateTagwiseDisp(z)
# exactTest
# Compute genewise exact tests for differences in the mean between two groups of negative-binomially distributed counts
un.group <- unique(as.character(targets.0$group))
pair <- list(c(un.group[1],un.group[2]),c(un.group[1],un.group[3]),c(un.group[1],un.group[4]),
c(un.group[1],un.group[5]),c(un.group[1],un.group[6]),c(un.group[1],un.group[7]),
c(un.group[1],un.group[8]),c(un.group[1],un.group[9]),c(un.group[1],un.group[10]),
c(un.group[1],un.group[11]))
# run exactTest
res.0 <- lapply(pair, function(x){
exactTest(z,pair = x)
})
# adjust p-value via Benjamini & Hochberg (aka FDR)
res.0 <- lapply(res.0, function(x){ within(x$table, {
padj = p.adjust(x$table$PValue,method="BH")})
})de.genes.0.l <- list()
df.summary.0.l <- list()
for(i in 1:length(res.0)){
cat("\n\n")
pandoc.header(paste(un.group[1]," vs. ", un.group[i+1], sep=''), level=5)
padj.up <- res.0[[i]][(res.0[[i]]$padj < 0.01 & res.0[[i]]$logFC > log2(2)),]
padj.dn <- res.0[[i]][(res.0[[i]]$padj < 0.01 & res.0[[i]]$logFC < -log2(2)),]
de.genes.0.l[[i]] <- rbind(padj.up,padj.dn)
df.summary.0.l[[i]] <- df.temp <-c(nrow(padj.up),round(minpositive(padj.up$logFC),3),
round(max(padj.up$logFC),3),
nrow(padj.dn),
round(maxnegative(padj.dn$logFC),3),
round(min(padj.dn$logFC),3))
p <- ggplot(data = res.0[[i]], aes(x=res.0[[i]]$logCPM, y=res.0[[i]]$logFC)) +
theme_bw() + geom_point(size=1) + ylab("log2(FC)") + xlab("log2(CPM)") +
geom_point(data = padj.up, aes(x=padj.up$logCPM,y=padj.up$logFC), color="red", size=1) +
geom_point(data = padj.dn, aes(x=padj.dn$logCPM,y=padj.dn$logFC), color="blue", size=1) +
labs(title=paste(un.group[1]," vs. ", un.group[i+1], ": pval < 0.01 & log2(FC) > log2(2)",sep=''))
print(p)
}# Summary table
df.summary.0 <- do.call("rbind",df.summary.0.l)
colnames(df.summary.0) <- c("DE up","min +log2FC","max +log2FC","DE down","min -log2FC","max -log2FC")
rownames(df.summary.0) <- paste(un.group[1],".vs.", un.group[2:11], sep='')
names(de.genes.0.l) <- paste(un.group[1],".vs.", un.group[2:11], sep='')
datatable(df.summary.0, extensions = 'Buttons',
options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))# Get list of genes that are DE in at least 1 comparison
de.genes.names.0 <- do.call("c",lapply(de.genes.0.l, function(x){rownames(x)}))
de.genes.names.0.u <- unique(de.genes.names.0)
df.genes.0 <- do.call("cbind",lapply(res.0, function(x){x[rownames(x) %in% de.genes.names.0.u,]}))
colnames(df.genes.0)<-c(paste(un.group[1],".vs.", un.group[2], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[3], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[4], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[5], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[6], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[7], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[8], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[9], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[10], "_", colnames(df.genes.0)[1:4], sep=''),
paste(un.group[1],".vs.", un.group[11], "_", colnames(df.genes.0)[1:4], sep=''))# select samples needed for this comparison (all time points)
counts.a <- counts.combined[,1:33]
targets.a <- targets[1:33,]
# make the data frame to edgeR formate DGEList
z.a <- DGEList(counts=counts.a, group=targets.a$group)
# filtering
keep.a <- rowSums(cpm(z.a)>3) >= 3
table(keep.a)## keep.a
## FALSE TRUE
## 9339 15809
z.a <- z.a[keep.a, , keep.lib.sizes=FALSE]
#normalizes for RNA composition by finding a set of scaling
#factors for the library sizes that minimize the log-fold changes between the samples for most genes.
# TMM normalization
z.a <- calcNormFactors(z.a)
### Estimating Dispersions
#see page 18 of user guide for explanation of qCML
#estimate the qCML common dispersion
z.a <- estimateCommonDisp(z.a)
#qCML tagwise dispersions
z.a <- estimateTagwiseDisp(z.a)
# exactTest
# Compute genewise exact tests for differences in the mean between two groups of negative-binomially distributed counts
un.group.a <- unique(as.character(targets.a$group))
pair.a <- list(c(un.group.a[1],un.group.a[2]),c(un.group.a[1],un.group.a[3]),
c(un.group.a[1],un.group.a[4]),
c(un.group.a[1],un.group.a[5]),c(un.group.a[1],un.group.a[6]),
c(un.group.a[1],un.group.a[7]),
c(un.group.a[1],un.group.a[8]),c(un.group.a[1],un.group.a[9]),
c(un.group.a[1],un.group.a[10]),
c(un.group.a[1],un.group.a[11]))
# run exactTest
res.a <- lapply(pair.a, function(x){
exactTest(z.a,pair = x)
})
# adjust p-value via Benjamini & Hochberg (aka FDR)
res.a <- lapply(res.a, function(x){ within(x$table, {
padj = p.adjust(x$table$PValue,method="BH")})
})de.genes.a.l <- list()
df.summary.a.l <- list()
for(i in 1:length(res.a)){
cat("\n\n")
pandoc.header(paste(un.group.a[1]," vs. ", un.group.a[i+1], sep=''), level=5)
padj.up <- res.a[[i]][(res.a[[i]]$padj < 0.01 & res.a[[i]]$logFC > log2(2)),]
padj.dn <- res.a[[i]][(res.a[[i]]$padj < 0.01 & res.a[[i]]$logFC < -log2(2)),]
de.genes.a.l[[i]] <- rbind(padj.up,padj.dn)
df.summary.a.l[[i]] <- df.temp <-c(nrow(padj.up),round(minpositive(padj.up$logFC),3),
round(max(padj.up$logFC),3),
nrow(padj.dn),
round(maxnegative(padj.dn$logFC),3),
round(min(padj.dn$logFC),3))
p <- ggplot(data = res.a[[i]], aes(x=res.a[[i]]$logCPM, y=res.a[[i]]$logFC)) +
theme_bw() + geom_point(size=1) + ylab("log2(FC)") + xlab("log2(CPM)") +
geom_point(data = padj.up, aes(x=padj.up$logCPM,y=padj.up$logFC), color="red", size=1) +
geom_point(data = padj.dn, aes(x=padj.dn$logCPM,y=padj.dn$logFC), color="blue", size=1) +
labs(title=paste(un.group[1]," vs. ", un.group[i+1], ": pval < 0.01 & log2(FC) > log2(2)",sep=''))
print(p)
}# Summary table
df.summary.a <- do.call("rbind",df.summary.a.l)
colnames(df.summary.a) <- c("DE up","min +log2FC","max +log2FC","DE down","min -log2FC","max -log2FC")
rownames(df.summary.a) <- paste(un.group.a[1],".vs.", un.group.a[2:11], sep='')
names(de.genes.a.l) <- paste(un.group.a[1],".vs.", un.group.a[2:11], sep='')
datatable(df.summary.a, extensions = 'Buttons',
options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))# Get list of genes that are DE in at least 1 comparison
de.genes.names.a <- do.call("c",lapply(de.genes.a.l, function(x){rownames(x)}))
de.genes.names.a.u <- unique(de.genes.names.a)
df.genes.a <- do.call("cbind",lapply(res.a, function(x){x[rownames(x) %in% de.genes.names.a.u,]}))
colnames(df.genes.a)<-c(paste(un.group.a[1],".vs.", un.group.a[2],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.", un.group.a[3],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.", un.group.a[4],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.", un.group.a[5],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.", un.group.a[6],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.", un.group.a[7],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.",un.group.a[8],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.",un.group.a[9],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.",un.group.a[10],"_",colnames(df.genes.a)[1:4], sep=''),
paste(un.group.a[1],".vs.",un.group.a[11],"_",colnames(df.genes.a)[1:4], sep=''))de.genes.0.l.2 <- lapply(de.genes.0.l, function(x) {x$gene_name <- rownames(x);return(x)})
de.genes.a.l.2 <- lapply(de.genes.a.l, function(x) {x$gene_name <- rownames(x);return(x)})
df_both <- Map(merge, de.genes.0.l.2, de.genes.a.l.2, by="gene_name")
sum.t <- data.frame(time_0=sapply(de.genes.0.l.2, nrow), Amp=sapply(de.genes.a.l.2, nrow),
both=sapply(df_both, nrow))
datatable(sum.t, extensions = 'Buttons',
options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))# using edgeR normalized counts pseudo.counts
norm.counts <- z$pseudo.counts
# Design matrix: time, replicates and 1 group
ma.design <- data.frame(time=rep(c(0,3,6,14,c(1,2,3,4,7,18)*24),each=3), replicates = rep(c(1:10), each = 3),
group=rep(1,30))
rownames(ma.design) <- colnames(norm.counts)[c(1:3,7:33)]
ma.design
# make the design matrix
design <- make.design.matrix(ma.design)
# run p.vector: performs a regression fit for each gene taking all variables present in the model
fit <- p.vector(data=norm.counts, design=design, Q=0.05, counts=T) # 0.05 = 10356, 0.01 = , 0.001 = 5728
#saveRDS(fit,file="/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/fit_ftc_0dpa.RDS")
#fit <- readRDS("/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/fit_ftc_0dpa.RDS")
# Run T.fit selects the best regression model for each gene using stepwise regression
tstep <- T.fit(fit)
tstep.b2 <- T.fit(fit, step.method = "two.ways.backward")
#saveRDS(tstep,file="/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/tstep_ftc_0dpa.RDS")
#tstep <- readRDS("/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/tstep_ftc_0dpa.RDS")
tstep$g # 5728
sigs <- get.siggenes(tstep, rsq = 0.6, vars = "all") # all vs. each??
sigs$sig.genes$g # 985
t.fit <- as.data.frame(fit$dat)
t.fit$genes <- rownames(t.fit)
t.step <- as.data.frame(tstep$dat)
t.step$genes <- rownames(t.step)
t <- sigs$sig.genes$sig.profiles
t$genes <- rownames(t)# Read in RPKMs
fin_tc_rpkm_filtered <- read.csv(here("results","tables","fin_tc_rpkm_filtered.csv"))
fin_tc_rpkm_filtered <- fin_tc_rpkm_filtered[,c(1:4,8:34)]
colnames(fin_tc_rpkm_filtered) <- c("X", colnames(counts.a)[4:33])
fin_tc_rpkm_filtered.log2 <- log2(fin_tc_rpkm_filtered[,2:31]+0.1) # log2 transform and add 1 so no 0
rownames(fin_tc_rpkm_filtered.log2) <- fin_tc_rpkm_filtered$X
# get significant genes
sig.log2fpkm <- fin_tc_rpkm_filtered.log2[match(rownames(sigs$sig.genes$sig.profiles),rownames(fin_tc_rpkm_filtered.log2)),]
# use Mfuzz to get clusters
da <- as.matrix(sig.log2fpkm) # rename
dat <- ExpressionSet(da) # convert to expression set
dat <- fill.NA(dat, mode="mean")
dat.s <- standardise(dat) # standarize, average is 0 w/ SDs
m1 <- mestimate(dat.s)# soft clustering, fuzzy c-means algorithm
cl <- mfuzz(dat.s,c=8,m=m1)
mfuzz.plot2(dat.s,cl=cl,mfrow=c(4,2),time.labels=colnames(da),x11=F,las=2,single=F,colo="fancy",min.mem=0,
ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters <- cl$cluster
clusters <- data.frame(cbind(names(clusters),as.numeric(cl$cluster)))
colnames(clusters) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters$V1[clusters$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls <- lapply(1:8,cluster.mean,da)
names(wt.meanls) <- paste0("cluster_",1:8)
wt.reps <- do.call(rbind,wt.meanls)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.reps, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.reps, cluster_cols = TRUE)# soft clustering, fuzzy c-means algorithm
cl <- mfuzz(dat.s,c=16,m=m1)
mfuzz.plot2(dat.s,cl=cl,mfrow=c(4,4),time.labels=colnames(da),x11=F,las=2,single=F,colo="fancy",min.mem=0,
ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters <- cl$cluster
clusters <- data.frame(cbind(names(clusters),as.numeric(cl$cluster)))
colnames(clusters) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters$V1[clusters$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls <- lapply(1:16,cluster.mean,da)
names(wt.meanls) <- paste0("cluster_",1:16)
wt.reps <- do.call(rbind,wt.meanls)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.reps, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.reps, cluster_cols = TRUE)# Read in RPKMs
fin_tc_rpkm_filtered <- read.csv(here("results","tables","fin_tc_rpkm_filtered.csv"))
fin_tc_rpkm_filtered <- fin_tc_rpkm_filtered[,c(1:4,8:34)]
colnames(fin_tc_rpkm_filtered) <- c("X", colnames(counts.a)[4:33])
fin_tc_rpkm_filtered.log2 <- log2(fin_tc_rpkm_filtered[,2:31]+0.1) # log2 transform and add 1 so no 0
rownames(fin_tc_rpkm_filtered.log2) <- fin_tc_rpkm_filtered$X
# mean of every 3 columns
n <- 1:ncol(fin_tc_rpkm_filtered.log2)
ind <- data.frame(matrix(c(n, rep(NA, 3 - ncol(fin_tc_rpkm_filtered.log2)%%3)), byrow=F, nrow=3))
nonna <- sapply(ind, function(x) all(!is.na(x)))
ind <- ind[, nonna]
fin_tc_rpkm_filtered.log2.mean <- do.call(cbind, lapply(ind, function(i)rowMeans(fin_tc_rpkm_filtered.log2[, i])))
colnames(fin_tc_rpkm_filtered.log2.mean) <- c("mean.0dpa", "mean.3hpa", "mean.6hpa", "mean.14hpa", "mean.1dpa",
"mean.2dpa", "mean.3dpa", "mean.4dpa", "mean.7dpa", "mean.18dpa")
# get significant genes
sig.log2fpkm.mean <- fin_tc_rpkm_filtered.log2.mean[match(rownames(sigs$sig.genes$sig.profiles),rownames(fin_tc_rpkm_filtered.log2.mean)),]
# use Mfuzz to get clusters
da.mean <- as.matrix(sig.log2fpkm.mean) # rename
dat.mean <- ExpressionSet(da.mean) # convert to expression set
#da.meant <- fill.NA(dat.mean, mode="knn")
dat.s.mean <- standardise(dat.mean) # standarize, average is 0 w/ SDs
m1.mean <- mestimate(dat.s.mean)# soft clustering, fuzzy c-means algorithm
cl.mean <- mfuzz(dat.s.mean,c=8,m=m1.mean)
mfuzz.plot2(dat.s.mean,cl=cl.mean,mfrow=c(4,2),time.labels=colnames(da.mean),x11=F,las=2,single=F,
colo="fancy",min.mem=0,ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters.mean <- cl.mean$cluster
clusters.mean <- data.frame(cbind(names(clusters.mean),as.numeric(cl.mean$cluster)))
colnames(clusters.mean) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters.mean$V1[clusters.mean$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls.mean <- lapply(1:8,cluster.mean,da.mean)
names(wt.meanls.mean) <- paste0("cluster_",1:8)
wt.mean <- do.call(rbind,wt.meanls.mean)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.mean, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.mean, cluster_cols = TRUE)# soft clustering, fuzzy c-means algorithm
cl.mean <- mfuzz(dat.s.mean,c=16,m=m1.mean)
mfuzz.plot2(dat.s.mean,cl=cl.mean,mfrow=c(4,4),time.labels=colnames(da.mean),x11=F,las=2,single=F,
colo="fancy",min.mem=0,ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters.mean <- cl.mean$cluster
clusters.mean <- data.frame(cbind(names(clusters.mean),as.numeric(cl.mean$cluster)))
colnames(clusters.mean) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters.mean$V1[clusters.mean$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls.mean <- lapply(1:16,cluster.mean,da.mean)
names(wt.meanls.mean) <- paste0("cluster_",1:16)
wt.mean <- do.call(rbind,wt.meanls.mean)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.mean, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.mean, cluster_cols = TRUE)# using edgeR normalized counts pseudo.counts
norm.counts <- z.a$pseudo.counts
# Design matrix: time, replicates and 1 group
ma.design <- data.frame(time=rep(c(0,3,6,14,c(1,2,3,4,7,18)*24),each=3), replicates = rep(c(1:10), each = 3),
group=rep(1,30))
rownames(ma.design) <- colnames(norm.counts)[c(1:3,7:33)]
ma.design
# make the design matrix
design <- make.design.matrix(ma.design)
# run p.vector: performs a regression fit for each gene taking all variables present in the model
#fit <- p.vector(data=norm.counts, design=design, Q=0.001, counts=T) # 0.05 = , 0.01 = , 0.001 = 5728
#saveRDS(fit,file="/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/fit_ftc_amp.RDS")
fit <- readRDS(here("data","rdata","fit_ftc_amp.RDS"))
# Run T.fit selects the best regression model for each gene using stepwise regression
#tstep <- T.fit(fit)
#saveRDS(tstep,file="/home/wew/Nothobranchius_furzeri/dag.analysis/wew3/data/rdata/tstep_ftc_amp.RDS")
tstep <- readRDS(here("data","rdata","tstep_ftc_amp.RDS"))
tstep$g # 4360
sigs <- get.siggenes(tstep, rsq = 0.6, vars = "all") # all vs. each??
sigs$sig.genes$g # 690# Read in RPKMs
fin_tc_rpkm_filtered <- read.csv(here("results","tables","fin_tc_rpkm_filtered.csv"))
fin_tc_rpkm_filtered <- fin_tc_rpkm_filtered[,c(1,5:34)]
colnames(fin_tc_rpkm_filtered) <- c("X", colnames(counts.a)[c(1:3,7:33)])
fin_tc_rpkm_filtered.log2 <- log2(fin_tc_rpkm_filtered[,2:31]+0.1) # log2 transform and add 1 so no 0
rownames(fin_tc_rpkm_filtered.log2) <- fin_tc_rpkm_filtered$X
# get significant genes
sig.log2fpkm <- fin_tc_rpkm_filtered.log2[match(rownames(sigs$sig.genes$sig.profiles),rownames(fin_tc_rpkm_filtered.log2)),]
# use Mfuzz to get clusters
da <- as.matrix(sig.log2fpkm) # rename
dat <- ExpressionSet(da) # convert to expression set
dat <- fill.NA(dat, mode="mean")
dat.s <- standardise(dat) # standarize, average is 0 w/ SDs
m1 <- mestimate(dat.s)# soft clustering, fuzzy c-means algorithm
cl <- mfuzz(dat.s,c=8,m=m1)
mfuzz.plot2(dat.s,cl=cl,mfrow=c(4,2),time.labels=colnames(da),x11=F,las=2,single=F,colo="fancy",min.mem=0,
ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters <- cl$cluster
clusters <- data.frame(cbind(names(clusters),as.numeric(cl$cluster)))
colnames(clusters) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters$V1[clusters$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls <- lapply(1:8,cluster.mean,da)
names(wt.meanls) <- paste0("cluster_",1:8)
wt.reps <- do.call(rbind,wt.meanls)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.reps, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.reps, cluster_cols = TRUE)# soft clustering, fuzzy c-means algorithm
cl <- mfuzz(dat.s,c=16,m=m1)
mfuzz.plot2(dat.s,cl=cl,mfrow=c(4,4),time.labels=colnames(da),x11=F,las=2,single=F,colo="fancy",min.mem=0,
ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters <- cl$cluster
clusters <- data.frame(cbind(names(clusters),as.numeric(cl$cluster)))
colnames(clusters) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters$V1[clusters$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls <- lapply(1:16,cluster.mean,da)
names(wt.meanls) <- paste0("cluster_",1:16)
wt.reps <- do.call(rbind,wt.meanls)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.reps, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.reps, cluster_cols = TRUE)# Read in RPKMs
fin_tc_rpkm_filtered <- read.csv(here("results","tables","fin_tc_rpkm_filtered.csv"))
fin_tc_rpkm_filtered <- fin_tc_rpkm_filtered[,c(1,5:34)]
colnames(fin_tc_rpkm_filtered) <- c("X", colnames(counts.a)[c(1:3,7:33)])
fin_tc_rpkm_filtered.log2 <- log2(fin_tc_rpkm_filtered[,2:31]+0.1) # log2 transform and add 1 so no 0
rownames(fin_tc_rpkm_filtered.log2) <- fin_tc_rpkm_filtered$X
# mean of every 3 columns
n <- 1:ncol(fin_tc_rpkm_filtered.log2)
ind <- data.frame(matrix(c(n, rep(NA, 3 - ncol(fin_tc_rpkm_filtered.log2)%%3)), byrow=F, nrow=3))
nonna <- sapply(ind, function(x) all(!is.na(x)))
ind <- ind[, nonna]
fin_tc_rpkm_filtered.log2.mean <- do.call(cbind, lapply(ind, function(i)rowMeans(fin_tc_rpkm_filtered.log2[, i])))
colnames(fin_tc_rpkm_filtered.log2.mean) <- c("mean.Amp", "mean.3hpa", "mean.6hpa", "mean.14hpa", "mean.1dpa",
"mean.2dpa", "mean.3dpa", "mean.4dpa", "mean.7dpa", "mean.18dpa")
# get significant genes
sig.log2fpkm.mean <- fin_tc_rpkm_filtered.log2.mean[match(rownames(sigs$sig.genes$sig.profiles),rownames(fin_tc_rpkm_filtered.log2.mean)),]
# use Mfuzz to get clusters
da.mean <- as.matrix(sig.log2fpkm.mean) # rename
dat.mean <- ExpressionSet(da.mean) # convert to expression set
#da.meant <- fill.NA(dat.mean, mode="knn")
dat.s.mean <- standardise(dat.mean) # standarize, average is 0 w/ SDs
m1.mean <- mestimate(dat.s.mean)# soft clustering, fuzzy c-means algorithm
cl.mean <- mfuzz(dat.s.mean,c=8,m=m1.mean)
mfuzz.plot2(dat.s.mean,cl=cl.mean,mfrow=c(4,2),time.labels=colnames(da.mean),x11=F,las=2,single=F,
colo="fancy",min.mem=0,ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters.mean <- cl.mean$cluster
clusters.mean <- data.frame(cbind(names(clusters.mean),as.numeric(cl.mean$cluster)))
colnames(clusters.mean) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters.mean$V1[clusters.mean$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls.mean <- lapply(1:8,cluster.mean,da.mean)
names(wt.meanls.mean) <- paste0("cluster_",1:8)
wt.mean <- do.call(rbind,wt.meanls.mean)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.mean, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.mean, cluster_cols = TRUE)# soft clustering, fuzzy c-means algorithm
cl.mean <- mfuzz(dat.s.mean,c=16,m=m1.mean)
mfuzz.plot2(dat.s.mean,cl=cl.mean,mfrow=c(4,4),time.labels=colnames(da.mean),x11=F,las=2,single=F,
colo="fancy",min.mem=0,ylab="log2FPKM",centre=T)# plot clusters in a heat map
clusters.mean <- cl.mean$cluster
clusters.mean <- data.frame(cbind(names(clusters.mean),as.numeric(cl.mean$cluster)))
colnames(clusters.mean) <- c("V1","V2")
# get mean of the genes in the clusters
cluster.mean=function(n,da)
{
gn=da[rownames(da) %in% clusters.mean$V1[clusters.mean$V2==n],]
#print(head(gn))
gn.scale=apply(gn,1,scale)
#print(head(gn.scale))
gn.mean=apply(gn.scale,1,mean)
#print(head(gn.mean))
names(gn.mean)=colnames(gn)
gn.mean
}
wt.meanls.mean <- lapply(1:16,cluster.mean,da.mean)
names(wt.meanls.mean) <- paste0("cluster_",1:16)
wt.mean <- do.call(rbind,wt.meanls.mean)
# Now plot
cat("\n\n")pandoc.header("Cluster by row", level=6)pheatmap(wt.mean, cluster_cols = FALSE)cat("\n\n")pandoc.header("Cluster by row and column", level=6)pheatmap(wt.mean, cluster_cols = TRUE)## [1] "R version 3.4.3 (2017-11-30)"
## [2] "Platform: x86_64-w64-mingw32/x64 (64-bit)"
## [3] "Running under: Windows 10 x64 (build 16299)"
## [4] ""
## [5] "Matrix products: default"
## [6] ""
## [7] "locale:"
## [8] "[1] LC_COLLATE=English_United States.1252 "
## [9] "[2] LC_CTYPE=English_United States.1252 "
## [10] "[3] LC_MONETARY=English_United States.1252"
## [11] "[4] LC_NUMERIC=C "
## [12] "[5] LC_TIME=English_United States.1252 "
## [13] ""
## [14] "attached base packages:"
## [15] " [1] stats4 tcltk parallel stats graphics grDevices utils "
## [16] " [8] datasets methods base "
## [17] ""
## [18] "other attached packages:"
## [19] " [1] GenomicAlignments_1.14.1 SummarizedExperiment_1.8.1"
## [20] " [3] DelayedArray_0.4.1 matrixStats_0.53.0 "
## [21] " [5] GenomicFeatures_1.30.2 AnnotationDbi_1.40.0 "
## [22] " [7] rtracklayer_1.38.3 Rsamtools_1.30.0 "
## [23] " [9] Biostrings_2.46.0 XVector_0.18.0 "
## [24] "[11] GenomicRanges_1.30.0 GenomeInfoDb_1.14.0 "
## [25] "[13] IRanges_2.12.0 S4Vectors_0.16.0 "
## [26] "[15] here_0.1 Mfuzz_2.38.0 "
## [27] "[17] DynDoc_1.56.0 widgetTools_1.56.0 "
## [28] "[19] e1071_1.6-8 Biobase_2.38.0 "
## [29] "[21] BiocGenerics_0.24.0 maSigPro_1.50.0 "
## [30] "[23] DT_0.3 reshape_0.8.7 "
## [31] "[25] edgeR_3.20.7 limma_3.34.6 "
## [32] "[27] pheatmap_1.0.8 ggplot2_2.2.1 "
## [33] "[29] biomaRt_2.34.2 pander_0.6.1 "
## [34] "[31] knitr_1.19 "
## [35] ""
## [36] "loaded via a namespace (and not attached):"
## [37] " [1] httr_1.3.1 RMySQL_0.10.13 jsonlite_1.5 "
## [38] " [4] bit64_0.9-7 shiny_1.0.5 assertthat_0.2.0 "
## [39] " [7] blob_1.1.0 GenomeInfoDbData_1.0.0 yaml_2.1.16 "
## [40] "[10] progress_1.1.2 pillar_1.1.0 RSQLite_2.0 "
## [41] "[13] backports_1.1.2 lattice_0.20-35 digest_0.6.14 "
## [42] "[16] RColorBrewer_1.1-2 tkWidgets_1.56.0 colorspace_1.3-2 "
## [43] "[19] httpuv_1.3.5 htmltools_0.3.6 Matrix_1.2-12 "
## [44] "[22] plyr_1.8.4 XML_3.98-1.9 venn_1.5 "
## [45] "[25] zlibbioc_1.24.0 xtable_1.8-2 scales_0.5.0 "
## [46] "[28] BiocParallel_1.12.0 tibble_1.4.2 lazyeval_0.2.1 "
## [47] "[31] mime_0.5 magrittr_1.5 mclust_5.4 "
## [48] "[34] memoise_1.1.0 evaluate_0.10.1 MASS_7.3-48 "
## [49] "[37] class_7.3-14 tools_3.4.3 prettyunits_1.0.2 "
## [50] "[40] stringr_1.2.0 munsell_0.4.3 locfit_1.5-9.1 "
## [51] "[43] compiler_3.4.3 rlang_0.1.6 grid_3.4.3 "
## [52] "[46] RCurl_1.95-4.10 htmlwidgets_1.0 crosstalk_1.0.0 "
## [53] "[49] labeling_0.3 bitops_1.0-6 rmarkdown_1.8 "
## [54] "[52] gtable_0.2.0 DBI_0.7 R6_2.2.2 "
## [55] "[55] bit_1.1-12 rprojroot_1.3-2 stringi_1.1.6 "
## [56] "[58] Rcpp_0.12.15 "